(Gilbert et al. 2014)[https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4253859/] treated K562 cells and compared ricin treated to untreated cells to find genes that contribute to resistance and susceptibility of ricin. They also sequenced the day 0 population. In theory, if we compare the untreated population to the initial population then essential genes should be depleted among the guides. We will use this data as a baseline to compare methods, as essential genes are a well studied class of genes.
WeissmanCRISPRiRicinTreatment = read.table(file = "~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatment.txt", header = TRUE)
head(WeissmanCRISPRiRicinTreatment)
## sgId
## 1 Apoptosis+Cancer+Other_Cancer=A2M_+_9268488.25-all~e39m1
## 2 Apoptosis+Cancer+Other_Cancer=A2M_+_9268495.24-all~e39m1
## 3 Apoptosis+Cancer+Other_Cancer=A2M_+_9268513.26-all~e39m1
## 4 Apoptosis+Cancer+Other_Cancer=A2M_+_9268524.25-all~e39m1
## 5 Apoptosis+Cancer+Other_Cancer=A2M_+_9268659.23-all~e39m1
## 6 Apoptosis+Cancer+Other_Cancer=A2M_+_9268708.26-all~e39m1
## sequence gene T0JEV T0MH treatedJEV treatedMH
## 1 GACCAGATGGATTGTAGGGAGT A2M 988 1114 813 1116
## 2 GTTTGGGACCAGATGGATTGT A2M 587 592 356 526
## 3 GCCCAGTTGCTTTGGGAAGTGTT A2M 843 909 818 721
## 4 GCAGCATAAAGCCCAGTTGCTT A2M 572 655 848 797
## 5 GGGATTCTATTTAGCCCGCC A2M 431 417 349 396
## 6 GTACGGTAAAAGTGAGCTCTTAC A2M 191 159 59 248
## untreatedJEV untreatedMH
## 1 1131 1050
## 2 670 507
## 3 1057 867
## 4 855 690
## 5 394 411
## 6 217 167
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts = WeissmanCRISPRiRicinTreatment[ ,c("sequence", "gene", "T0JEV", "T0MH", "untreatedJEV", "untreatedMH")]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts[-which(rowSums(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts[ ,3:6]) < 50), ]
write.table(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts[-which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene == "negative_control"), ], file = "~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0GeneCounts.txt", quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
write.table(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene == "negative_control"), ], file = "~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NegCtrlCounts.txt", quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colMeans,
## colnames, colSums, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
## rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Warning: package 'GenomicRanges' was built under R version 3.4.3
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Warning: package 'SummarizedExperiment' was built under R version 3.4.3
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:base':
##
## apply
counts = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts[, 3:6]
coldata = data.frame(condition = c(0, 0, 1, 1), rep = c(0, 1, 0, 1))
rownames(coldata) = colnames(counts)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq = DESeq2::DESeqDataSetFromMatrix(countData = counts,
colData = coldata,
design = ~ condition)
## the design formula contains a numeric variable with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq = DESeq2::DESeq(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq = DESeq2::results(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq)
log2fc = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq$log2FoldChange
b = seq(from = min(log2fc) - 0.1, to = max(log2fc) + 0.1, length = 81)
negCtrl = log2fc[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene == "negative_control")]
log2fc = log2fc[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene != "negative_control")]
geneIds = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene != "negative_control")]
geneIds = factor(geneIds, levels = unique(geneIds))
hist(negCtrl, breaks = b, probability = TRUE, col = rgb(1, 0, 0, 0.7), xlab = "log2fc", main = "negative control vs gene targetting guides")
hist(log2fc, breaks = b, probability = TRUE, add = TRUE, col = rgb(0, 1, 0, 0.7))
legend("topleft", legend = c("negative control", "gene targetting"), lwd = 0, lty = 0, pch = 16, col = c(rgb(1, 0, 0, 0.7), rgb(0, 1, 0, 0.7)))
min(negCtrl)
## [1] -5.255485
min(log2fc)
## [1] -10.99162
There is a clear enrichment of depleted guides compared to the negative control guides. Let’s apply and take a look at the results of different algorithms.
design_matrix = data.frame(Samples = rownames(coldata), baseline = c(1, 1, 1, 1), condition = coldata$condition)
write.table(design_matrix, file = "~/sgRNA/sgRNA2Groups/data/Weissman/design_matrix.txt", quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
system("~/sgRNA/sgRNA2Groups/data/mageck/mageck-0.5.6/bin/mageck mle -k ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0GeneCounts.txt -d ~/sgRNA/sgRNA2Groups/data/Weissman/design_matrix.txt --output-prefix ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle --control-sgrna ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NegCtrlCounts.txt")
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle = read.table(file = "~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.gene_summary.txt", header = TRUE)
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
## Gene sgRNA condition.beta condition.z condition.p.value condition.fdr
## 1 RNF14 20 0.106220 4.31670 0.0012645 0.033611
## 2 DUOXA1 10 0.019616 0.58528 0.3902300 0.875420
## 3 RNF17 10 0.104460 2.98260 0.0014523 0.038095
## 4 RNF10 20 0.016656 0.68569 0.4333900 0.886710
## 5 RNF11 10 0.016568 0.48065 0.4345700 0.887640
## 6 RNF13 10 0.040640 1.17980 0.1515500 0.691240
## condition.wald.p.value condition.wald.fdr
## 1 1.5836e-05 0.00017024
## 2 5.5836e-01 0.85116000
## 3 2.8577e-03 0.02196900
## 4 4.9291e-01 0.81930000
## 5 6.3077e-01 0.88152000
## 6 2.3810e-01 0.60645000
#hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.wald.p.value, breaks = 80, col = "grey")
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.p.value, breaks = 80, col = "grey")
system("~/sgRNA/sgRNA2Groups/data/mageck/mageck-0.5.6/bin/mageck test -k ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0GeneCounts.txt -t T0JEV,T0MH --output-prefix ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra --control-sgrna ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NegCtrlCounts.txt")
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra = read.table(file = "~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra.gene_summary.txt", header = TRUE)
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra)
## id num neg.score neg.p.value neg.fdr neg.rank neg.goodsgrna
## 1 SPI1 10 2.4991e-17 3.1001e-07 0 1 9
## 2 C12orf66 10 2.1441e-14 3.1001e-07 0 2 8
## 3 MTF2 10 2.8904e-13 3.1001e-07 0 3 10
## 4 STAG2 25 1.5763e-12 3.1001e-07 0 4 18
## 5 BPGM 10 2.1601e-10 3.1001e-07 0 5 8
## 6 STAT5A 20 2.0139e-09 3.1001e-07 0 6 15
## neg.lfc pos.score pos.p.value pos.fdr pos.rank pos.goodsgrna pos.lfc
## 1 -0.49186 0.96896 0.0014418 0.001442 15114 0 -0.49186
## 2 -0.55801 0.99838 0.0014418 0.001442 15901 0 -0.55801
## 3 -0.38311 1.00000 0.0014418 0.001442 15968 0 -0.38311
## 4 -0.22078 0.92604 0.0014418 0.001442 14231 2 -0.22078
## 5 -0.24166 0.36034 0.0014412 0.001442 6939 1 -0.24166
## 6 -0.19751 1.00000 0.0014418 0.001442 15969 0 -0.19751
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra$neg.p.value, breaks = 80, col = "grey")
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra$pos.p.value, breaks = 80, col = "grey")
library(CRISPhieRmix)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix = CRISPhieRmix(x = log2fc, geneIds = geneIds, negCtrl = negCtrl, mu = -6, sigma = 2, pq = 0.05, VERBOSE = TRUE, PLOT = TRUE, nMesh = 20)
## fit negative control distributions
## 2 groups
## EM converged
## mu = -1.678681
## sigma = 1.361389
## pq = 0.0538104
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores = data.frame(gene = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$genes, locfdr = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$locfdr)
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr, breaks = 80, col = "grey")
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores[order(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr, decreasing = FALSE), ], 10)
## gene locfdr
## ANAPC4 ANAPC4 0
## ANAPC5 ANAPC5 0
## AQR AQR 0
## ASCC3 ASCC3 0
## ATF5 ATF5 0
## BCAS2 BCAS2 0
## CAD CAD 0
## CASP8AP2 CASP8AP2 0
## CDC20 CDC20 0
## CDC27 CDC27 0
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix = CRISPhieRmix(x = log2fc, geneIds = geneIds, mu = -6, sigma = 2, pq = 0.05, VERBOSE = TRUE, PLOT = TRUE, nMesh = 20)
## no negative controls provided, fitting hierarchical normal model
## Loading required package: mixtools
## mixtools package, version 1.1.0, Released 2017-03-10
## This package is based upon work supported by the National Science Foundation under Grant No. SES-0518772.
## number of iterations= 20
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores = data.frame(gene = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix$genes, locfdr = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix$locfdr)
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr, breaks = 80, col = "grey")
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores[order(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr, decreasing = FALSE), ], 10)
## gene locfdr
## AATF AATF 0
## ABCA3 ABCA3 0
## ABCB10 ABCB10 0
## ABTB1 ABTB1 0
## ACTG1 ACTG1 0
## ACTR8 ACTR8 0
## ADAMTSL3 ADAMTSL3 0
## ADCY3 ADCY3 0
## ADCY4 ADCY4 0
## ADSS ADSS 0
MannWhitneyPvals = sapply(unique(geneIds), function(g) return(wilcox.test(x = log2fc[which(geneIds == g)], negCtrl, paired = FALSE)$p.value))
names(MannWhitneyPvals) = unique(geneIds)
hist(MannWhitneyPvals, breaks = 40, col = "grey")
MannWhitneyFdrs = p.adjust(MannWhitneyPvals, method = "BH")
head(sort(MannWhitneyFdrs, decreasing = FALSE), 20)
## RPL17 ASNA1 RPL31 LSM6 COPB1
## 9.615331e-12 1.675838e-11 2.735450e-11 4.450390e-11 5.259081e-11
## SNRPF HSD17B10 HSPA9 RPL10A AQR
## 1.488890e-10 2.782676e-10 3.048158e-10 5.597503e-10 7.115365e-10
## NOP2 SNRNP70 TANGO6 LAS1L CLNS1A
## 7.115365e-10 7.228967e-10 7.228967e-10 7.869923e-10 9.601214e-10
## INCENP CYB5B NRDE2 KPNB1 PSMA1
## 9.872310e-10 1.417373e-09 1.429930e-09 2.199799e-09 2.510920e-09
top3MannWhitneyPval <- function(x, negCtrl){
top3x = head(sort(x, decreasing = FALSE), 3)
topNegCtrl = head(sort(negCtrl, decreasing = FALSE), round(3*length(negCtrl)/length(x)))
return(wilcox.test(top3x, topNegCtrl, paired = FALSE)$p.value)
}
top3MannWhitneyPvals = sapply(unique(geneIds), function(g) return(top3MannWhitneyPval(x = log2fc[which(geneIds == g)], negCtrl = negCtrl)))
names(top3MannWhitneyPvals) = unique(geneIds)
top3MannWhitneyFdrs = p.adjust(top3MannWhitneyPvals, method = "BH")
head(sort(top3MannWhitneyFdrs, decreasing = FALSE), 20)
## AATF ACTR8 ADCYAP1 AMBP ANAPC13 ANAPC1
## 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475
## ANAPC4 ANAPC5 AQR ARGLU1 ASCC3 ATF5
## 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475
## ATG3 ATP6V1G1 ATXN10 BANP BCAS2 BCCIP
## 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475
## BCL2L14 BCL6
## 0.04193475 0.04193475
We can use the essential genes from Evers et al. 2016 to estimate proxy measures for false positive rates and false negative rates.
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores)
## [1] 15975 2
EssentialGenes = read.table(file = "~/sgRNA/sgRNA2Groups/data/Evers2016/EssentialGenes.txt", header = TRUE)
head(EssentialGenes)
## gene essential
## 1 RPS24 1
## 2 RPL11 1
## 3 PSMA3 1
## 4 RPL34 1
## 5 RPS8 1
## 6 RPL30 1
dim(EssentialGenes)
## [1] 93 2
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores[match(EssentialGenes$gene, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$gene), ]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene)), ]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = data.frame(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential, essential = EssentialGenes$essential[which(EssentialGenes$gene %in% WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene)])
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
## [1] 67 3
library(ggjoy)
## Loading required package: ggplot2
## Loading required package: ggridges
## The ggjoy package has been deprecated. Please switch over to the
## ggridges package, which provides the same functionality. Porting
## guidelines can be found here:
## https://github.com/clauswilke/ggjoy/blob/master/README.md
library(ggplot2)
b = seq(from = 0, to = 1, length = 41)
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential == 0)], breaks = b, col = rgb(1, 0, 0, 0.7), main = "essential vs nonessential genes", xlab = "locfdr", ylim = c(0, 40))
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential == 1)], breaks = b, col = rgb(0, 1, 0, 0.7), add = TRUE)
estimatedFdr = sapply(1:dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)[1], function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr[i])]))
1 - sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential[which(estimatedFdr < 0.2)])/sum(estimatedFdr < 0.2)
## [1] 0
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:IRanges':
##
## cov, var
## The following objects are masked from 'package:S4Vectors':
##
## cov, var
## The following object is masked from 'package:BiocGenerics':
##
## var
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc = roc(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc
##
## Call:
## roc.default(response = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
##
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr in 21 controls (WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential 0) > 46 cases (WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential 1).
## Area under the curve: 0.9772
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle[match(EssentialGenes$gene, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$Gene), ]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene)), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## [1] 67 8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## Gene sgRNA condition.beta condition.z condition.p.value
## 10683 RPS24 20 -1.03990 -38.0170 0.0000e+00
## 5703 RPL11 20 -0.61181 -25.2040 5.0078e-05
## 8747 PSMA3 20 -0.62255 -23.5930 3.7559e-05
## 13912 RPL34 20 -0.67810 -27.0830 1.2520e-05
## 11850 RPS8 20 -0.11783 -4.9472 2.8248e-01
## 5111 RPL30 20 -0.59654 -23.4070 6.2598e-05
## condition.fdr condition.wald.p.value condition.wald.fdr
## 10683 0.00000000 0.0000e+00 0.0000e+00
## 5703 0.00225350 3.6258e-140 2.7321e-138
## 8747 0.00174420 4.5397e-123 3.0600e-121
## 13912 0.00059524 1.5500e-161 1.3989e-159
## 11850 0.83158000 7.5299e-07 9.0785e-06
## 5111 0.00268100 3.6412e-121 2.4236e-119
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = data.frame(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential, essential = EssentialGenes$essential[which(EssentialGenes$gene %in% WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene)])
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## [1] 67 9
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## Gene sgRNA condition.beta condition.z condition.p.value
## 10683 RPS24 20 -1.03990 -38.0170 0.0000e+00
## 5703 RPL11 20 -0.61181 -25.2040 5.0078e-05
## 8747 PSMA3 20 -0.62255 -23.5930 3.7559e-05
## 13912 RPL34 20 -0.67810 -27.0830 1.2520e-05
## 11850 RPS8 20 -0.11783 -4.9472 2.8248e-01
## 5111 RPL30 20 -0.59654 -23.4070 6.2598e-05
## condition.fdr condition.wald.p.value condition.wald.fdr essential
## 10683 0.00000000 0.0000e+00 0.0000e+00 1
## 5703 0.00225350 3.6258e-140 2.7321e-138 1
## 8747 0.00174420 4.5397e-123 3.0600e-121 1
## 13912 0.00059524 1.5500e-161 1.3989e-159 1
## 11850 0.83158000 7.5299e-07 9.0785e-06 1
## 5111 0.00268100 3.6412e-121 2.4236e-119 1
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.p.value[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential == 0)], breaks = b, col = rgb(1, 0, 0, 0.7), main = "essential vs nonessential genes", xlab = "permutation p-value", ylim = c(0, 40))
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.p.value[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential == 1)], breaks = b, col = rgb(0, 1, 0, 0.7), add = TRUE)
#hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.p.value[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential == 0)], breaks = b, col = rgb(1, 0, 0, 0.7), main = "essential vs nonessential genes", xlab = "wald p-value", ylim = c(0, 50))
#hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.p.value[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential == 1)], breaks = b, col = rgb(0, 1, 0, 0.7), add = TRUE)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc = roc(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc
##
## Call:
## roc.default(response = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
##
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr in 21 controls (WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential 0) > 46 cases (WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential 1).
## Area under the curve: 0.9586
1 - sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)
## [1] 0
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc = roc(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr)
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc
#1 - sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)
The performance of all is similar. They all pretty much correctly identify the essential genes. To get better estimates we’ll take the list of essential genes directly from Hart et al 2015.
ConstitutiveCoreEssentialGenes = scan("~/sgRNA/sgRNA2Groups/data/Weissman/ConstitutiveCoreEssentialGenes.txt", what = character())
length(ConstitutiveCoreEssentialGenes)
## [1] 217
head(ConstitutiveCoreEssentialGenes)
## [1] "ALYREF" "AQR" "ARCN1" "BRIX1" "C12orf66" "CCT3"
tail(ConstitutiveCoreEssentialGenes)
## [1] "XAB2" "XPO1" "YY1" "ZBTB48" "ZC3H13" "ZNF207"
NonEssentialGenes = scan("~/sgRNA/sgRNA2Groups/data/Weissman/NonEssentialGenes.txt", what = character())
EssentialGenes = data.frame(genes = factor(c(sapply(ConstitutiveCoreEssentialGenes, toString), sapply(NonEssentialGenes, toString))), essential = c(rep(1, times = length(ConstitutiveCoreEssentialGenes)), rep(0, times = length(NonEssentialGenes))))
dim(EssentialGenes)
## [1] 1144 2
head(EssentialGenes)
## genes essential
## ALYREF ALYREF 1
## AQR AQR 1
## ARCN1 ARCN1 1
## BRIX1 BRIX1 1
## C12orf66 C12orf66 1
## CCT3 CCT3 1
tail(EssentialGenes)
## genes essential
## ZNF679 ZNF679 0
## ZNF804B ZNF804B 0
## ZNRF4 ZNRF4 0
## ZP2 ZP2 0
## ZP4 ZP4 0
## ZSWIM2 ZSWIM2 0
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$genes %in% ConstitutiveCoreEssentialGenes)
## [1] 217
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$genes %in% NonEssentialGenes)
## [1] 460
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores[match(EssentialGenes$genes, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$gene), ]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene)), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
## [1] 677 2
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
## gene locfdr
## ALYREF ALYREF 0.01962154
## AQR AQR 0.00000000
## ARCN1 ARCN1 0.00000000
## BRIX1 BRIX1 0.97280497
## C12orf66 C12orf66 0.09600878
## CCT3 CCT3 0.02372761
EssentialGenes = EssentialGenes[which(EssentialGenes$genes %in% WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene), ]
dim(EssentialGenes)
## [1] 677 2
head(EssentialGenes)
## genes essential
## ALYREF ALYREF 1
## AQR AQR 1
## ARCN1 ARCN1 1
## BRIX1 BRIX1 1
## C12orf66 C12orf66 1
## CCT3 CCT3 1
library(pROC)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc
##
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
##
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr in 460 controls (EssentialGenes$essential 0) > 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.9104
estimatedFdr = sapply(1:dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)[1], function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr[i])]))
AllEstimatedFdr = sapply(1:length(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr), function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[i])]))
length(estimatedFdr)
## [1] 677
head(estimatedFdr)
## [1] 0.0005734193 0.0000000000 0.0000000000 0.8374039915 0.0070784015
## [6] 0.0008534623
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
## gene locfdr
## ALYREF ALYREF 0.01962154
## AQR AQR 0.00000000
## ARCN1 ARCN1 0.00000000
## BRIX1 BRIX1 0.97280497
## C12orf66 C12orf66 0.09600878
## CCT3 CCT3 0.02372761
1 - sum(EssentialGenes$essential[which(estimatedFdr < 0.1)])/sum(estimatedFdr < 0.1)
## [1] 0.01149425
sum(estimatedFdr < 0.1)
## [1] 174
sum(EssentialGenes$essential[which(estimatedFdr < 0.1)])
## [1] 172
1 - sum(EssentialGenes$essential[which(estimatedFdr < 0.2)])/sum(estimatedFdr < 0.2)
## [1] 0.02762431
sum(estimatedFdr < 0.2)
## [1] 181
sum(EssentialGenes$essential[which(estimatedFdr < 0.2)])
## [1] 176
sum(AllEstimatedFdr < 0.1)
## [1] 1425
sum(AllEstimatedFdr < 0.2)
## [1] 1853
max(estimatedFdr[which(EssentialGenes$essential == 1)])
## [1] 0.8458869
hist(estimatedFdr, breaks = 100, col = "grey")
fdr.curve <- function(thresh, fdrs, baseline){
w = which(fdrs < thresh)
if(length(w) > 0){
return(sum(1 - baseline[w])/length(w))
}
else{
return(NA)
}
}
s = seq(from = 0, to = 1, length = 1001)
f = sapply(s, function(t) fdr.curve(t, estimatedFdr, EssentialGenes$essential))
plot(c(0, s[!is.na(f)]), c(0, f[!is.na(f)]), type = "l", xlab = "estimated FDR", ylab = "empirical FDR", main = "CRISPhieRmix estimated FDR", xlim = c(0, 1), ylim = c(0, 1), lwd = 2, col = "dodgerblue")
abline(0, 1, lty = 2)
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
## [1] 15975 8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
## Gene sgRNA condition.beta condition.z condition.p.value condition.fdr
## 1 RNF14 20 0.106220 4.31670 0.0012645 0.033611
## 2 DUOXA1 10 0.019616 0.58528 0.3902300 0.875420
## 3 RNF17 10 0.104460 2.98260 0.0014523 0.038095
## 4 RNF10 20 0.016656 0.68569 0.4333900 0.886710
## 5 RNF11 10 0.016568 0.48065 0.4345700 0.887640
## 6 RNF13 10 0.040640 1.17980 0.1515500 0.691240
## condition.wald.p.value condition.wald.fdr
## 1 1.5836e-05 0.00017024
## 2 5.5836e-01 0.85116000
## 3 2.8577e-03 0.02196900
## 4 4.9291e-01 0.81930000
## 5 6.3077e-01 0.88152000
## 6 2.3810e-01 0.60645000
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle[match(EssentialGenes$genes, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$Gene), ]
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene)), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## [1] 677 8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## Gene sgRNA condition.beta condition.z condition.p.value
## 6506 ALYREF 10 -0.272860 -7.9535 0.0236490
## 8102 AQR 20 -1.163200 -38.7860 0.0000000
## 4658 ARCN1 20 -0.889430 -33.6890 0.0000000
## 15106 BRIX1 20 -0.027779 -1.1841 0.9162200
## 5560 C12orf66 10 0.302800 8.7749 0.0000000
## 7228 CCT3 10 -0.385800 -11.2170 0.0037058
## condition.fdr condition.wald.p.value condition.wald.fdr
## 6506 0.287080 1.8136e-15 3.0788e-14
## 8102 0.000000 0.0000e+00 0.0000e+00
## 4658 0.000000 8.3883e-249 1.5228e-246
## 15106 0.988650 2.3637e-01 6.0484e-01
## 5560 0.000000 1.7102e-18 3.1512e-17
## 7228 0.079892 3.3534e-29 7.5028e-28
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc
##
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
##
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr in 460 controls (EssentialGenes$essential 0) > 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.8652
1 - sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)
## [1] 0.02752294
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)
## [1] 109
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.1)
## [1] 795
sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)])
## [1] 106
1 - sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)
## [1] 0.04545455
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)
## [1] 132
sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)])
## [1] 126
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.2)
## [1] 1024
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr, breaks = 100, col = "grey")
f = sapply(s, function(t) fdr.curve(t, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr, EssentialGenes$essential))
plot(c(0, s[!is.na(f)]), c(0, f[!is.na(f)]), type = "l", xlab = "estimated FDR", ylab = "empirical FDR", main = "MAGeCK permutation FDR", xlim = c(0, 1), ylim = c(0, 1), lwd = 2, col = "deeppink")
abline(0, 1, lty = 2)
max(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr[which(EssentialGenes$essential == 1)])
## [1] 0.99795
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc
##
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr)
##
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr in 460 controls (EssentialGenes$essential 0) > 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.9434
1 - sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.1)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.1)
## [1] 0.05154639
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.1)
## [1] 194
1 - sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)
## [1] 0.09268293
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)
## [1] 205
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr, breaks = 100, col = "grey")
f = sapply(s, function(t) fdr.curve(t, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr, EssentialGenes$essential))
plot(c(0, s[!is.na(f)]), c(0, f[!is.na(f)]), type = "l", xlab = "estimated FDR", ylab = "empirical FDR", main = "MAGeCK Wald FDR", xlim = c(0, 1), ylim = c(0, 1), lwd = 2, col = "darkviolet")
abline(0, 1, lty = 2)
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix$genes %in% ConstitutiveCoreEssentialGenes)
## [1] 217
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix$genes %in% NonEssentialGenes)
## [1] 460
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores[match(EssentialGenes$genes, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$gene), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential)
## [1] 677 2
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential)
## gene locfdr
## ALYREF ALYREF 0.000000e+00
## AQR AQR 0.000000e+00
## ARCN1 ARCN1 0.000000e+00
## BRIX1 BRIX1 9.704430e-01
## C12orf66 C12orf66 2.220446e-16
## CCT3 CCT3 0.000000e+00
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$locfdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc
##
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$locfdr)
##
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$locfdr in 460 controls (EssentialGenes$essential 0) > 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.9147
NormalEstimatedFdr = sapply(1:dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential)[1], function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$locfdr[i])]))
NormalAllEstimatedFdr = sapply(1:dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores)[1], function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr[i])]))
sum(NormalAllEstimatedFdr < 0.1)
## [1] 4476
1 - sum(EssentialGenes$essential[which(NormalEstimatedFdr < 0.1)])/sum(NormalEstimatedFdr < 0.1)
## [1] 0.2139918
sum(EssentialGenes$essential[which(NormalEstimatedFdr < 0.1)])
## [1] 191
sum(1 - EssentialGenes$essential[which(NormalEstimatedFdr < 0.1)])
## [1] 52
sum(NormalEstimatedFdr < 0.1)
## [1] 243
sum(NormalAllEstimatedFdr < 0.2)
## [1] 5258
1 - sum(EssentialGenes$essential[which(NormalEstimatedFdr < 0.2)])/sum(NormalEstimatedFdr < 0.2)
## [1] 0.2788104
sum(EssentialGenes$essential[which(NormalEstimatedFdr < 0.2)])
## [1] 194
sum(1 - EssentialGenes$essential[which(NormalEstimatedFdr < 0.2)])
## [1] 75
sum(NormalEstimatedFdr < 0.2)
## [1] 269
hist(estimatedFdr, breaks = 100, col = "grey")
f = sapply(s, function(t) fdr.curve(t, NormalEstimatedFdr, EssentialGenes$essential))
plot(c(0, s[!is.na(f)]), c(0, f[!is.na(f)]), type = "l", xlab = "estimated FDR", ylab = "empirical FDR", main = "Normal hierarchical mixture estimated FDR", xlim = c(0, 1), ylim = c(0, 1), lwd = 2, col = "darkblue")
abline(0, 1, lty = 2)
dim(EssentialGenes)
## [1] 677 2
sum(EssentialGenes$essential)
## [1] 217
MannWhitneyFdrs.essential = MannWhitneyFdrs[which(names(MannWhitneyFdrs) %in% EssentialGenes$gene)]
hist(MannWhitneyFdrs.essential, breaks = seq(from = 0, to = 1, length = 101), col = "grey")
sum(MannWhitneyFdrs < 0.1)
## [1] 1383
sum(MannWhitneyFdrs.essential < 0.1)
## [1] 148
sum(MannWhitneyFdrs.essential < 0.1 & EssentialGenes$essential == 1)
## [1] 52
sum(MannWhitneyFdrs < 0.2)
## [1] 1894
sum(MannWhitneyFdrs.essential < 0.2)
## [1] 166
sum(MannWhitneyFdrs.essential < 0.2 & EssentialGenes$essential == 1)
## [1] 58
MannWhitneyFdrs.essential.roc = roc(EssentialGenes$essential, MannWhitneyFdrs.essential)
MannWhitneyFdrs.essential.roc
##
## Call:
## roc.default(response = EssentialGenes$essential, predictor = MannWhitneyFdrs.essential)
##
## Data: MannWhitneyFdrs.essential in 460 controls (EssentialGenes$essential 0) < 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.5136
length(intersect(names(MannWhitneyFdrs.essential)[which(MannWhitneyFdrs.essential < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]))
## [1] 0
top3MannWhitneyFdrs.essential = top3MannWhitneyFdrs[which(names(top3MannWhitneyFdrs) %in% EssentialGenes$gene)]
hist(top3MannWhitneyFdrs.essential, breaks = seq(from = 0, to = 1, length = 101), col = "grey")
sum(top3MannWhitneyFdrs < 0.1)
## [1] 3550
sum(top3MannWhitneyFdrs.essential < 0.1)
## [1] 227
sum(top3MannWhitneyFdrs.essential < 0.1 & EssentialGenes$essential == 1)
## [1] 78
sum(top3MannWhitneyFdrs < 0.2)
## [1] 5342
sum(top3MannWhitneyFdrs.essential < 0.2)
## [1] 291
sum(top3MannWhitneyFdrs.essential < 0.2 & EssentialGenes$essential == 1)
## [1] 95
top3MannWhitneyFdrs.essential.roc = roc(EssentialGenes$essential, top3MannWhitneyFdrs.essential)
top3MannWhitneyFdrs.essential.roc
##
## Call:
## roc.default(response = EssentialGenes$essential, predictor = top3MannWhitneyFdrs.essential)
##
## Data: top3MannWhitneyFdrs.essential in 460 controls (EssentialGenes$essential 0) > 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.5176
length(intersect(names(top3MannWhitneyFdrs.essential)[which(top3MannWhitneyFdrs.essential < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]))
## [1] 0
length(intersect(names(MannWhitneyFdrs.essential)[which(MannWhitneyFdrs.essential < 0.1)], intersect(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)])))
## [1] 0
cols = RColorBrewer::brewer.pal(8, "Set1")
plot(top3MannWhitneyFdrs.essential.roc, col = cols[1], lwd = 2, lty = 7, xlim = c(0, 1), ylim = c(0, 1), main = "ROC curves")
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc, col = cols[2], lwd = 2, lty = 4)
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc, col = cols[3], lwd = 2, lty = 5)
lines(MannWhitneyFdrs.essential.roc, col = cols[4], lwd = 2, lty = 6)
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc, col = cols[5], lwd = 2, lty = 2)
legend("bottomleft", legend = c(paste0("Mann-Whitney of top3 guides (AUC = ", round(top3MannWhitneyFdrs.essential.roc$auc, digits = 2), ")"), paste0("Mann-Whitney (AUC = ", round(MannWhitneyFdrs.essential.roc$auc, digits = 2), ")"), paste0("MAGeCK MLE (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc$auc, digits = 2), ")"), paste0("CRISPhieRmix (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc$auc, digits = 2), ")"), paste0("normal hier mix (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc$auc, digits = 2), ")")), lty = c(7, 6, 2, 4, 5), lwd = 2, col = c("darkgreen", "chartreuse", "deeppink", "dodgerblue", "darkblue"))
pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/CRISPRiUntreated2day0RocCurves.pdf')
plot(top3MannWhitneyFdrs.essential.roc, col = "darkgreen", lwd = 2, lty = 7, xlim = c(0, 1), ylim = c(0, 1), main = "ROC curves")
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc, col = "dodgerblue", lwd = 2, lty = 4)
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc, col = "darkblue", lwd = 2, lty = 5)
lines(MannWhitneyFdrs.essential.roc, col = "chartreuse", lwd = 2, lty = 6)
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc, col = "deeppink", lwd = 2, lty = 2)
legend("bottomleft", legend = c(paste0("Mann-Whitney of top3 guides (AUC = ", round(top3MannWhitneyFdrs.essential.roc$auc, digits = 2), ")"), paste0("Mann-Whitney (AUC = ", round(MannWhitneyFdrs.essential.roc$auc, digits = 2), ")"), paste0("MAGeCK MLE (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc$auc, digits = 2), ")"), paste0("CRISPhieRmix (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc$auc, digits = 2), ")"), paste0("normal hier mix (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc$auc, digits = 2), ")")), lty = c(7, 6, 2, 4, 5), lwd = 2, col = c("darkgreen", "chartreuse", "deeppink", "dodgerblue", "darkblue"))
dev.off()
## quartz_off_screen
## 2
b = seq(from = min(log2fc) - 0.1, to = max(log2fc) + 0.1, length = 201)
hist(log2fc, breaks = 80, probability = TRUE, main = "mixture fit to observations")
mixFit = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$mixFit
lines(b, mixFit$pq*dnorm(b, mixFit$mu, mixFit$sigma), lwd = 2, col = "darkgreen")
lines(b, (1 - mixFit$pq)*sn::dst(b, dp = mixFit$skewtFit$dp), col = "red", lwd = 2)
lines(b, mixFit$pq*dnorm(b, mixFit$mu, mixFit$sigma) + (1 - mixFit$pq)*sn::dst(b, dp = mixFit$skewtFit$dp), col = "darkviolet", lty = 2, lwd = 2)
pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/CRISPRiUntreated2day0CRISPhieRmixFit.pdf')
b = seq(from = min(log2fc) - 0.1, to = max(log2fc) + 0.1, length = 201)
hist(log2fc, breaks = 80, probability = TRUE, main = "mixture fit to observations")
mixFit = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$mixFit
lines(b, mixFit$pq*dnorm(b, mixFit$mu, mixFit$sigma), lwd = 2, col = "darkgreen")
lines(b, (1 - mixFit$pq)*sn::dst(b, dp = mixFit$skewtFit$dp), col = "red", lwd = 2)
lines(b, mixFit$pq*dnorm(b, mixFit$mu, mixFit$sigma) + (1 - mixFit$pq)*sn::dst(b, dp = mixFit$skewtFit$dp), col = "darkviolet", lty = 2, lwd = 2)
legend("topleft", legend = c("null fit", "dropout fit", "combined fit"), lty = c(1, 1, 2), col = c("darkgreen", "red", "darkviolet"))
dev.off()
## quartz_off_screen
## 2
mageckGenes = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1 & EssentialGenes$essential == 1)]
length(mageckGenes)
## [1] 106
CRISPhieRmixGenes = EssentialGenes$gene[which(estimatedFdr < 0.1 & EssentialGenes$essential == 1)]
length(CRISPhieRmixGenes)
## [1] 172
length(intersect(mageckGenes, CRISPhieRmixGenes))
## [1] 105
GenesInCommon = intersect(mageckGenes, CRISPhieRmixGenes)
b = seq(from = min(log2fc) - 0.1, to = max(log2fc) + 0.1, length = 81)
hist(log2fc[which(geneIds %in% GenesInCommon)], breaks = b, col = "grey", probability = TRUE)
pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/GenesInCommonHist.pdf')
hist(log2fc[which(geneIds %in% GenesInCommon)], breaks = b, col = "grey", probability = TRUE)
dev.off()
## quartz_off_screen
## 2
GenesCRISPhieRspecific = setdiff(CRISPhieRmixGenes, GenesInCommon)
length(GenesCRISPhieRspecific)
## [1] 67
hist(log2fc[which(geneIds %in% GenesCRISPhieRspecific)], breaks = b, col = "grey", probability = TRUE)
pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/GenesCRISPhieRspecificHist.pdf')
hist(log2fc[which(geneIds %in% GenesCRISPhieRspecific)], breaks = b, col = "grey", probability = TRUE)
dev.off()
## quartz_off_screen
## 2
hist(negCtrl, breaks = b, probability = TRUE, col = rgb(1, 0, 0, 0.6), xlab = "log2fc")
hist(log2fc[which(geneIds == GenesInCommon[1])], breaks = b, probability = TRUE, col = rgb(0, 1, 0, 0.8), add = TRUE)
hist(log2fc[which(geneIds == GenesCRISPhieRspecific[1])], breaks = b, probability = TRUE, col = rgb(0, 0, 1, 0.8), add = TRUE)
legend("topleft", legend = c("negative control", GenesInCommon[1], GenesCRISPhieRspecific[1]), pch = 15, col = c(rgb(1, 0, 0, 0.6), rgb(0, 1, 0, 0.8), rgb(0, 0, 1, 0.8)))
y = data.frame(gene = c(rep("negCtrl", times = length(negCtrl)), rep(toString(GenesInCommon[1]), times = length(which(geneIds == GenesInCommon[1]))), rep(toString(GenesCRISPhieRspecific[1]), times = length(which(geneIds == GenesCRISPhieRspecific[1])))), log2fc = c(negCtrl, log2fc[which(geneIds == GenesInCommon[1])], log2fc[which(geneIds == GenesCRISPhieRspecific[1])]))
library(ggplot2)
library(ggjoy)
ggplot(y, aes(x = log2fc, y = gene)) + geom_joy(scale = 0.8) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
## Picking joint bandwidth of 0.309
pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/GenesCRISPhieRspecificHistExampleGenes.pdf')
ggplot(y, aes(x = log2fc, y = gene)) + geom_joy(scale = 0.8) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
## Picking joint bandwidth of 0.309
dev.off()
## quartz_off_screen
## 2
y = data.frame(gene = c(rep("negCtrl", times = length(negCtrl)), rep("Genes In Common", times = length(which(geneIds %in% GenesInCommon))), rep("CRISPhieRspecific", times = length(which(geneIds %in% GenesCRISPhieRspecific)))), log2fc = c(negCtrl, log2fc[which(geneIds %in% GenesInCommon)], log2fc[which(geneIds %in% GenesCRISPhieRspecific)]))
ggplot(y, aes(x = log2fc, y = gene)) + geom_joy(scale = 0.9) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
## Picking joint bandwidth of 0.144
pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/GenesInCommonVsCRISPhieRspecificJoyplot.pdf')
ggplot(y, aes(x = log2fc, y = gene)) + geom_joy(scale = 0.9) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
## Picking joint bandwidth of 0.144
dev.off()
## quartz_off_screen
## 2
# 2017 essential genes
EssentialGenes = read.table("~/Downloads/TableS2.txt")
EssentialGenes = EssentialGenes[,1]
length(EssentialGenes)
## [1] 684
NonEssentialGenes = scan("~/sgRNA/sgRNA2Groups/data/Weissman/NonEssentialGenes.txt", what = character())
length(NonEssentialGenes)
## [1] 927
EssentialGenes = data.frame(gene = c(sapply(EssentialGenes, toString), sapply(NonEssentialGenes, toString)), essential = c(rep(1, times = length(EssentialGenes)), rep(0, times = length(NonEssentialGenes))))
dim(EssentialGenes)
## [1] 1611 2
head(EssentialGenes)
## gene essential
## 1 AARS 1
## 2 ABCE1 1
## 3 ABCF1 1
## 4 ACTB 1
## 5 ACTL6A 1
## 6 ACTR10 1
tail(EssentialGenes)
## gene essential
## 1606 ZNF679 0
## 1607 ZNF804B 0
## 1608 ZNRF4 0
## 1609 ZP2 0
## 1610 ZP4 0
## 1611 ZSWIM2 0
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix.estimatedFdr = sapply(1:length(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr), function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[i])]) )
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores = data.frame(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores, estimatedFdr = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix.estimatedFdr)
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$genes %in% EssentialGenes$gene)
## [1] 1139
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores[match(EssentialGenes$gene, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$gene), ]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene)), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
## [1] 1139 3
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
## gene locfdr estimatedFdr
## AARS AARS 0.000000e+00 0.000000e+00
## ABCE1 ABCE1 5.223723e-01 1.450369e-01
## ABCF1 ABCF1 2.146361e-01 2.867480e-02
## ACTB ACTB 9.398072e-01 5.846380e-01
## ACTL6A ACTL6A 0.000000e+00 0.000000e+00
## ACTR10 ACTR10 6.182854e-11 2.660263e-12
EssentialGenes = EssentialGenes[which(EssentialGenes$gene %in% WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene), ]
dim(EssentialGenes)
## [1] 1139 2
head(EssentialGenes)
## gene essential
## 1 AARS 1
## 2 ABCE1 1
## 3 ABCF1 1
## 4 ACTB 1
## 5 ACTL6A 1
## 6 ACTR10 1
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)
## [1] 436
length(which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1 & EssentialGenes$essential == 1))
## [1] 434
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$estimatedFdr < 0.1)
## [1] 1425
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.2)
## [1] 482
length(which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.2 & EssentialGenes$essential == 1))
## [1] 477
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr, breaks = seq(from = 0, to = 1, length = 101), col = "grey")
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc
##
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
##
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr in 460 controls (EssentialGenes$essential 0) > 679 cases (EssentialGenes$essential 1).
## Area under the curve: 0.8821
fdr.curve <- function(thresh, fdrs, baseline){
w = which(fdrs < thresh)
if(length(w) > 0){
return(sum(1 - baseline[w])/length(w))
}
else{
return(NA)
}
}
s = seq(from = 0, to = 1, length = 1001)
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
## [1] 15975 8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
## Gene sgRNA condition.beta condition.z condition.p.value condition.fdr
## 1 RNF14 20 0.106220 4.31670 0.0012645 0.033611
## 2 DUOXA1 10 0.019616 0.58528 0.3902300 0.875420
## 3 RNF17 10 0.104460 2.98260 0.0014523 0.038095
## 4 RNF10 20 0.016656 0.68569 0.4333900 0.886710
## 5 RNF11 10 0.016568 0.48065 0.4345700 0.887640
## 6 RNF13 10 0.040640 1.17980 0.1515500 0.691240
## condition.wald.p.value condition.wald.fdr
## 1 1.5836e-05 0.00017024
## 2 5.5836e-01 0.85116000
## 3 2.8577e-03 0.02196900
## 4 4.9291e-01 0.81930000
## 5 6.3077e-01 0.88152000
## 6 2.3810e-01 0.60645000
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle[match(EssentialGenes$gene, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$Gene), ]
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene)), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## [1] 1139 8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## Gene sgRNA condition.beta condition.z condition.p.value
## 8083 AARS 20 -1.105700 -43.18500 0.00000000
## 292 ABCE1 10 -0.199890 -6.03720 0.07482900
## 3584 ABCF1 9 -0.119750 -3.10410 0.27434000
## 6474 ACTB 10 -0.023119 -0.67452 0.97137000
## 15690 ACTL6A 20 -0.538490 -21.99600 0.00021283
## 4278 ACTR10 9 -0.800890 -19.66300 0.00000000
## condition.fdr condition.wald.p.value condition.wald.fdr
## 8083 0.0000000 0.0000e+00 0.0000e+00
## 292 0.5335100 1.5677e-09 2.1929e-08
## 3584 0.8248800 1.9086e-03 1.5192e-02
## 6474 0.9955800 4.9998e-01 8.2292e-01
## 15690 0.0076923 3.1238e-107 1.8146e-105
## 4278 0.0000000 4.4543e-86 2.0273e-84
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)
## [1] 264
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.1)
## [1] 795
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1 & EssentialGenes$essential == 1)
## [1] 261
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)
## [1] 300
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.2)
## [1] 1024
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2 & EssentialGenes$essential == 1)
## [1] 294
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr, breaks = seq(from = 0, to = 1, length = 101), col = "grey")
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc
##
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
##
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr in 460 controls (EssentialGenes$essential 0) > 679 cases (EssentialGenes$essential 1).
## Area under the curve: 0.7933
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.1)
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.wald.fdr < 0.1)
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.1 & EssentialGenes$essential == 1)
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.wald.fdr < 0.2)
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2 & EssentialGenes$essential == 1)
#hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr, breaks = seq(from = 0, to = 1, length = 101), col = "grey")
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr)
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc
length(intersect(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]))
## [1] 261
MannWhitneyFdrs.essential = MannWhitneyFdrs[which(names(MannWhitneyFdrs) %in% EssentialGenes$gene)]
hist(MannWhitneyFdrs.essential, breaks = seq(from = 0, to = 1, length = 101), col = "grey")
sum(MannWhitneyFdrs < 0.1)
## [1] 1383
sum(MannWhitneyFdrs.essential < 0.1)
## [1] 355
sum(MannWhitneyFdrs.essential < 0.1 & EssentialGenes$essential == 1)
## [1] 210
sum(MannWhitneyFdrs < 0.2)
## [1] 1894
sum(MannWhitneyFdrs.essential < 0.2)
## [1] 397
sum(MannWhitneyFdrs.essential < 0.2 & EssentialGenes$essential == 1)
## [1] 229
MannWhitneyFdrs.essential.roc = roc(EssentialGenes$essential, MannWhitneyFdrs.essential)
MannWhitneyFdrs.essential.roc
##
## Call:
## roc.default(response = EssentialGenes$essential, predictor = MannWhitneyFdrs.essential)
##
## Data: MannWhitneyFdrs.essential in 460 controls (EssentialGenes$essential 0) < 679 cases (EssentialGenes$essential 1).
## Area under the curve: 0.5185
length(intersect(names(MannWhitneyFdrs.essential)[which(MannWhitneyFdrs.essential < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]))
## [1] 325
top3MannWhitneyFdrs.essential = top3MannWhitneyFdrs[which(names(top3MannWhitneyFdrs) %in% EssentialGenes$gene)]
hist(top3MannWhitneyFdrs.essential, breaks = seq(from = 0, to = 1, length = 101), col = "grey")
sum(top3MannWhitneyFdrs < 0.1)
## [1] 3550
sum(top3MannWhitneyFdrs.essential < 0.1)
## [1] 555
sum(top3MannWhitneyFdrs.essential < 0.1 & EssentialGenes$essential == 1)
## [1] 333
sum(top3MannWhitneyFdrs < 0.2)
## [1] 5342
sum(top3MannWhitneyFdrs.essential < 0.2)
## [1] 644
sum(top3MannWhitneyFdrs.essential < 0.2 & EssentialGenes$essential == 1)
## [1] 392
top3MannWhitneyFdrs.essential.roc = roc(EssentialGenes$essential, top3MannWhitneyFdrs.essential)
top3MannWhitneyFdrs.essential.roc
##
## Call:
## roc.default(response = EssentialGenes$essential, predictor = top3MannWhitneyFdrs.essential)
##
## Data: top3MannWhitneyFdrs.essential in 460 controls (EssentialGenes$essential 0) > 679 cases (EssentialGenes$essential 1).
## Area under the curve: 0.5113
length(intersect(names(top3MannWhitneyFdrs.essential)[which(top3MannWhitneyFdrs.essential < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]))
## [1] 426
length(intersect(names(MannWhitneyFdrs.essential)[which(MannWhitneyFdrs.essential < 0.1)], intersect(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)])))
## [1] 242
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores = data.frame(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores, estimatedFdr = sapply(1:dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores)[1], function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr[i])])) )
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix$genes %in% EssentialGenes$gene)
## [1] 1139
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores[match(EssentialGenes$gene, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$gene), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential)
## [1] 1139 3
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential)
## gene locfdr estimatedFdr
## AARS AARS 0.0000000 0.0000000
## ABCE1 ABCE1 0.0000000 0.0000000
## ABCF1 ABCF1 0.0000000 0.0000000
## ACTB ACTB 0.7531346 0.1375928
## ACTL6A ACTL6A 0.0000000 0.0000000
## ACTR10 ACTR10 0.0000000 0.0000000
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$estimatedFdr < 0.1)
## [1] 608
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$estimatedFdr < 0.1)
## [1] 608
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$estimatedFdr < 0.2)
## [1] 645
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$estimatedFdr < 0.2)
## [1] 645
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$estimatedFdr, breaks = seq(from = 0, to = 1, length = 101), col = "grey")
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$locfdr)
mageckGenes = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)]
length(mageckGenes)
# CRISPhieRmixGenes = EssentialGenes$gene[which(estimatedFdr < 0.1 & EssentialGenes$essential == 1)]
CRISPhieRmixGenes = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]
length(CRISPhieRmixGenes)
length(intersect(mageckGenes, CRISPhieRmixGenes))
GenesInCommon = intersect(mageckGenes, CRISPhieRmixGenes)
b = seq(from = min(log2fc) - 0.1, to = max(log2fc) + 0.1, length = 81)
hist(log2fc[which(geneIds %in% GenesInCommon)], breaks = b, col = "grey", probability = TRUE)
GenesCRISPhieRspecific = setdiff(CRISPhieRmixGenes, GenesInCommon)
length(GenesCRISPhieRspecific)
hist(log2fc[which(geneIds %in% GenesCRISPhieRspecific)], breaks = b, col = "grey", probability = TRUE)
hist(negCtrl, breaks = b, probability = TRUE, col = rgb(1, 0, 0, 0.6))
hist(log2fc[which(geneIds == GenesInCommon[1])], breaks = b, probability = TRUE, add = TRUE, col = rgb(0, 1, 0, 0.8))
legend("topright", legend = c("negCtrl", GenesInCommon[1]), col = c(rgb(1, 0, 0, 0.6), rgb(0, 0, 1, 0.8)), pch = 15, lty = 0, lwd = 0)
hist(log2fc[which(geneIds == GenesCRISPhieRspecific[1])], breaks = b, probability = TRUE, add = TRUE, col = rgb(0, 0, 1, 0.8))
legend("topright", legend = c("negCtrl", GenesInCommon[1], GenesCRISPhieRspecific[1]), col = c(rgb(1, 0, 0, 0.6), rgb(0, 1, 1, 0.8), rgb(0, 0, 1, 0.8)), pch = 15, lty = 0, lwd = 0)
hist(negCtrl, breaks = b, probability = TRUE, col = rgb(1, 0, 0, 0.6))
hist(log2fc[which(geneIds == GenesInCommon[1])], breaks = b, probability = TRUE, add = TRUE, col = rgb(0, 1, 0, 0.8))
legend("topright", legend = c("negCtrl", GenesInCommon[2]), col = c(rgb(1, 0, 0, 0.6), rgb(0, 0, 1, 0.8)), pch = 15, lty = 0, lwd = 0)
hist(log2fc[which(geneIds == GenesCRISPhieRspecific[2])], breaks = b, probability = TRUE, add = TRUE, col = rgb(0, 0, 1, 0.8))
legend("topright", legend = c("negCtrl", GenesInCommon[2], GenesCRISPhieRspecific[2]), col = c(rgb(1, 0, 0, 0.6), rgb(0, 1, 1, 0.8), rgb(0, 0, 1, 0.8)), pch = 15, lty = 0, lwd = 0)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene == GenesCRISPhieRspecific[2]), ]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene == GenesCRISPhieRspecific[2]), ]
Intersection of all genes
estimatedFdrAll = sapply(1:length(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr), function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[i])]))
names(estimatedFdrAll) = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$gene
length(estimatedFdrAll)
## [1] 15975
head(estimatedFdrAll)
## A2M A4GALT AACS AATF ABCA13
## 7.993683e-01 8.390956e-01 6.082177e-01 4.129825e-06 8.187087e-01
## ABCA1
## 5.824710e-01
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle[match(names(estimatedFdrAll), WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$Gene), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
## [1] 15975 8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
## Gene sgRNA condition.beta condition.z condition.p.value
## 6801 A2M 10 0.0209040 0.61278 0.372310
## 13709 A4GALT 20 0.0239440 1.00660 0.330370
## 14185 AACS 10 -0.0459290 -1.32040 0.728300
## 11453 AATF 10 -0.2994100 -8.68820 0.015574
## 5766 ABCA13 10 0.0565180 1.71710 0.057552
## 15152 ABCA1 10 0.0082852 0.24552 0.563520
## condition.fdr condition.wald.p.value condition.wald.fdr
## 6801 0.86992 5.4002e-01 8.4492e-01
## 13709 0.84835 3.1414e-01 6.8688e-01
## 14185 0.96133 1.8671e-01 5.4222e-01
## 11453 0.22455 3.6836e-18 6.7328e-17
## 5766 0.46952 8.5960e-02 3.4599e-01
## 15152 0.92511 8.0605e-01 9.4383e-01
sum(estimatedFdrAll < 0.1)
## [1] 1425
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.1)
## [1] 795
length(intersect(names(estimatedFdrAll)[which(estimatedFdrAll < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.1)]))
## [1] 669